__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

'''
Code for the conversion of ebook formats and the reading of metadata
from various formats.
'''

import numbers
import os
import re
import sys
from contextlib import suppress

from calibre import prints
from calibre.ebooks.chardet import xml_to_unicode


class ConversionError(Exception):

    def __init__(self, msg, only_msg=False):
        Exception.__init__(self, msg)
        self.only_msg = only_msg


class UnknownFormatError(Exception):
    pass


class DRMError(ValueError):
    pass


class ParserError(ValueError):
    pass


BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
                   'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']


def return_raster_image(path):
    from calibre.utils.imghdr import what
    if os.access(path, os.R_OK):
        with open(path, 'rb') as f:
            raw = f.read()
        if what(None, raw) not in (None, 'svg'):
            return raw


def extract_cover_from_embedded_svg(html, base, log):
    from calibre.ebooks.oeb.base import SVG, XLINK, XPath
    from calibre.utils.xml_parse import safe_xml_fromstring
    root = safe_xml_fromstring(html)

    svg = XPath('//svg:svg')(root)
    if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
        image = svg[0][0]
        href = image.get(XLINK('href'), None)
        if href:
            path = os.path.join(base, *href.split('/'))
            return return_raster_image(path)


def extract_calibre_cover(raw, base, log):
    from calibre.ebooks.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(raw)
    matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
        'font', 'br'])
    images = soup.findAll('img', src=True)
    if matches is None and len(images) == 1 and \
            images[0].get('alt', '').lower()=='cover':
        img = images[0]
        img = os.path.join(base, *img['src'].split('/'))
        q = return_raster_image(img)
        if q is not None:
            return q

    # Look for a simple cover, i.e. a body with no text and only one <img> tag
    if matches is None:
        body = soup.find('body')
        if body is not None:
            text = ''.join(map(str, body.findAll(text=True)))
            if text.strip():
                # Body has text, abort
                return
            images = body.findAll('img', src=True)
            if len(images) == 1:
                img = os.path.join(base, *images[0]['src'].split('/'))
                return return_raster_image(img)


def render_html_svg_workaround(path_to_html, log, width=590, height=750, root=''):
    from calibre.ebooks.oeb.base import SVG_NS
    with open(path_to_html, 'rb') as f:
        raw = f.read()
    raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
    data = None
    if SVG_NS in raw:
        try:
            data = extract_cover_from_embedded_svg(raw,
                   os.path.dirname(path_to_html), log)
        except Exception:
            pass
    if data is None:
        try:
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except Exception:
            pass

    if data is None:
        data = render_html_data(path_to_html, width, height, root=root)
    return data


def render_html_data(path_to_html, width, height, root=''):
    from calibre.ptempfile import TemporaryDirectory
    from calibre.utils.ipc.simple_worker import WorkerError, fork_job
    result = {}

    def report_error(text=''):
        prints('Failed to render', path_to_html, 'with errors:', file=sys.stderr)
        if text:
            prints(text, file=sys.stderr)
        if result and result['stdout_stderr']:
            with open(result['stdout_stderr'], 'rb') as f:
                prints(f.read(), file=sys.stderr)

    with TemporaryDirectory('-render-html') as tdir:
        try:
            result = fork_job('calibre.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg', root))
        except WorkerError as e:
            report_error(e.orig_tb)
        else:
            if result['result']:
                with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f:
                    return f.read()
            else:
                report_error()


def check_ebook_format(stream, current_guess):
    ans = current_guess
    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'):
        stream.seek(0)
        if stream.read(3) == b'TPZ':
            ans = 'tpz'
        stream.seek(0)
    return ans


def normalize(x):
    if isinstance(x, str):
        import unicodedata
        x = unicodedata.normalize('NFC', x)
    return x


def calibre_cover(title, author_string, series_string=None,
        output_format='jpg', title_size=46, author_size=36, logo_path=None):
    title = normalize(title)
    author_string = normalize(author_string)
    series_string = normalize(series_string)
    from calibre.ebooks.covers import calibre_cover2
    from calibre.utils.img import image_to_data
    ans = calibre_cover2(title, author_string or '', series_string or '', logo_path=logo_path, as_qimage=True)
    return image_to_data(ans, fmt=output_format)


UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc|rem|q)$')


def unit_convert(value, base, font, dpi, body_font_size=12):
    ' Return value in pts'
    if isinstance(value, numbers.Number):
        return value
    with suppress(Exception):
        return float(value) * 72.0 / dpi
    result = value
    m = UNIT_RE.match(value)
    if m is not None and m.group(1):
        try:
            value = float(m.group(1))
        except ValueError:
            value = 0
        unit = m.group(2)
        if unit == '%':
            result = (value / 100.0) * base
        elif unit == 'px':
            result = value * 72.0 / dpi
        elif unit == 'in':
            result = value * 72.0
        elif unit == 'pt':
            result = value
        elif unit == 'em':
            result = value * font
        elif unit in ('ex', 'en'):
            # This is a hack for ex since we have no way to know
            # the x-height of the font
            font = font
            result = value * font * 0.5
        elif unit == 'pc':
            result = value * 12.0
        elif unit == 'mm':
            result = value * 2.8346456693
        elif unit == 'cm':
            result = value * 28.346456693
        elif unit == 'rem':
            result = value * body_font_size
        elif unit == 'q':
            result = value * 0.708661417325
    return result


def parse_css_length(value):
    try:
        m = UNIT_RE.match(value)
    except TypeError:
        return None, None
    if m is not None and m.group(1):
        value = float(m.group(1))
        unit = m.group(2)
        return value, unit.lower()
    return None, None


def generate_masthead(title, output_path=None, width=600, height=60):
    from calibre.ebooks.conversion.config import load_defaults
    recs = load_defaults('mobi_output')
    masthead_font_family = recs.get('masthead_font', None)
    from calibre.ebooks.covers import generate_masthead
    return generate_masthead(title, output_path=output_path, width=width, height=height, font_family=masthead_font_family)


def escape_xpath_attr(value):
    if '"' in value:
        if "'" in value:
            parts = re.split('("+)', value)
            ans = []
            for x in parts:
                if x:
                    q = "'" if '"' in x else '"'
                    ans.append(q + x + q)
            return 'concat(%s)' % ', '.join(ans)
        else:
            return "'%s'" % value
    return '"%s"' % value
